{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "binding-delta",
   "metadata": {
    "papermill": {
     "duration": 0.016304,
     "end_time": "2021-03-22T20:29:23.476444",
     "exception": false,
     "start_time": "2021-03-22T20:29:23.460140",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# spark_sql_interactive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1aeda72-a810-470b-ae65-40a12440f539",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13e202c5-5e40-4abd-bf5a-431409d521ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install apache-iceberg"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb97e294-9399-4d96-a95c-8ad7e29a2872",
   "metadata": {},
   "source": [
    "Interactively execute arbitrary SQL queries againts CSV and PARQUET files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abstract-cambridge",
   "metadata": {
    "papermill": {
     "duration": 0.012801,
     "end_time": "2021-03-22T20:29:25.972462",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.959661",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# name of resulting file (default: data_result.csv)\n",
    "output_result_file = os.environ.get('output_result_file', 'data_result.csv')\n",
    "\n",
    "# file name for CSV or PARQUET file - must end with .csv or .parquet (default: data.csv)\n",
    "data_file = os.environ.get('data_file', 'data.csv')\n",
    "\n",
    "# master url of spark master (default: local mode)\n",
    "master = os.environ.get('master', \"local[*]\")\n",
    "\n",
    "# data_dir temporal data storage for local execution (default: ../../data/)\n",
    "data_dir = os.environ.get('data_dir', '../../data/')\n",
    "\n",
    "# sql statement to execute, table name == df, example: select * from df\n",
    "sql = os.environ.get('sql')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55f9b39f-2c8c-4ab5-b4f5-513357bf20ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "parameters = list(\n",
    "    map(lambda s: re.sub('$', '\"', s),\n",
    "        map(\n",
    "            lambda s: s.replace('=', '=\"'),\n",
    "            filter(\n",
    "                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n",
    "                sys.argv\n",
    "            )\n",
    "    )))\n",
    "\n",
    "for parameter in parameters:\n",
    "    logging.warning('Parameter: ' + parameter)\n",
    "    exec(parameter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "welsh-grave",
   "metadata": {
    "papermill": {
     "duration": 4.178678,
     "end_time": "2021-03-22T20:29:30.176328",
     "exception": false,
     "start_time": "2021-03-22T20:29:25.997650",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "sc = SparkContext.getOrCreate(SparkConf().setMaster(master))\n",
    "spark = SparkSession.builder.getOrCreate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b68e981-91e4-4a2d-bfa0-1467e634efa8",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls /home/romeokienzler/gitco/claimed/component-library/transform/spark-sql-interactive/stocator-1.1.5-jar-with-dependencies.jar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33c06062-d2bf-4c65-87a0-b83ba02d7f9e",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "\n",
    "#spark-submit --jars /home/romeokienzler/gitco/stocator/target/stocator-1.1.5-jar-with-dependencies.jar --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:0.14.1 --py-files /home/romeokienzler/gitco/claimed/component-library/transform/spark-sql-interactive/app.py /home/romeokienzler/gitco/claimed/component-library/transform/spark-sql-interactive/app.py\n",
    "spark-submit --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:0.14.1\\\n",
    "    --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \\\n",
    "    --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \\\n",
    "    --conf spark.sql.catalog.spark_catalog.type=hive \\\n",
    "    --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \\\n",
    "    --conf spark.sql.catalog.local.type=hadoop \\\n",
    "    --jars /home/romeokienzler/gitco/stocator/target/stocator-1.1.5-jar-with-dependencies.jar\\\n",
    "    --conf spark.sql.catalog.local.warehouse=$PWD/warehouse\\\n",
    "    --py-files /home/romeokienzler/gitco/claimed/component-library/transform/spark-sql-interactive/app.py /home/romeokienzler/gitco/claimed/component-library/transform/spark-sql-interactive/app.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90b9e3fd-1524-4de0-b1f7-f7cb9fcee1f2",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "\n",
    "spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:0.14.1\\\n",
    "    --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \\\n",
    "    --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \\\n",
    "    --conf spark.sql.catalog.spark_catalog.type=hive \\\n",
    "    --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \\\n",
    "    --conf spark.sql.catalog.local.type=hadoop \\\n",
    "    --conf spark.sql.catalog.local.warehouse=$PWD/warehouse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa6d442e-c5c7-41b1-a7e5-bdace90a147d",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "\n",
    "spark-sql --packages org.apache.iceberg:iceberg-spark-runtime-3.2_2.12:0.14.1\\\n",
    "    --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.projectnessie.spark.extensions.NessieSparkSessionExtensions \\\n",
    "    --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog \\\n",
    "    --conf spark.sql.catalog.spark_catalog.type=hive \\\n",
    "    --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \\\n",
    "    --conf spark.sql.catalog.local.type=hadoop \\\n",
    "    --conf spark.sql.catalog.local.warehouse=$PWD/warehouse\n",
    "    \n",
    "    conf.set(\"spark.sql.catalog.nessie.warehouse\", \"/path/to/warehouse\");\n",
    "conf.set(\"spark.sql.catalog.nessie.uri\", \"https://nessie.u7fqdzt4f98.eu-de.codeengine.appdomain.cloud/api/v1\")\n",
    "conf.set(\"spark.sql.catalog.nessie.ref\", \"main\")\n",
    "conf.set(\"spark.sql.catalog.nessie.catalog-impl\", \"org.apache.iceberg.nessie.NessieCatalog\")\n",
    "conf.set(\"spark.sql.catalog.nessie\", \"org.apache.iceberg.spark.SparkCatalog\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44959b7f-3f16-4df5-a62a-44123e6b0e30",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.1 (main, Dec  7 2022, 00:00:00) [GCC 12.2.1 20221121 (Red Hat 12.2.1-4)]"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 470.538548,
   "end_time": "2021-03-22T20:37:13.369954",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb",
   "output_path": "/home/jovyan/work/examples/pipelines/pairs/component-library/transform/spark-csv-to-parquet.ipynb",
   "parameters": {},
   "start_time": "2021-03-22T20:29:22.831406",
   "version": "2.3.3"
  },
  "vscode": {
   "interpreter": {
    "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
